library(lmtest)
library(plm)
library(RCurl)
library(lme4)
library(tidyverse)
library(lazyeval)
library(magrittr)
library(openxlsx)
library(foreign)

select <- dplyr::select
contains <- dplyr::contains

# put in your directory here
setwd("")

# reading in contributions and candidate data
read.dta13("./Data/did_gender.dta")

# creating long dataset
outcomes_long <- NULL
for(i in c('0','1')){
  for(j in c('A', 'B', 'H', 'W')){
    for(k in c('d', 'r')){
      temp <- outcomes_all
      temp$race_gender_share <- temp[,paste0('general.sharetotal.', j, '.', i, '.', k)]
      temp$race_gender_total <- temp[,paste0('general.total.', j, '.', i, '.', k)]
      temp$race_gender_logtotal <- temp[,paste0('general.logtotal.', j, '.', i, '.', k)]
      temp$race_gender_total_all <- log(temp[,paste0('general.total.', j, '.', i, '.', k)] + temp[,paste0('primary.total.', j, '.', i, '.', k)])
      temp$id <- paste0(j, i)
      temp$party <- k
      temp$same_race <- ifelse(temp[,paste0('cand_', j, '.', k)]==1, 1, 0)
      temp$same_gender <- ifelse(temp[,paste0('cand_', i, '.', k)]==1, 1, 0)
      temp$lead1_same_race <- ifelse(temp[,paste0('lead1.cand_', j, '.', k)]==1, 1, 0)
      temp$lead1_same_gender <- ifelse(temp[,paste0('lead1.cand_', i, '.', k)]==1, 1, 0)
      temp$lead2_same_race <- ifelse(temp[,paste0('lead2.cand_', j, '.', k)]==1, 1, 0)
      temp$lead2_same_gender <- ifelse(temp[,paste0('lead2.cand_', i, '.', k)]==1, 1, 0)
      temp$lead3_same_race <- ifelse(temp[,paste0('lead3.cand_', j, '.', k)]==1, 1, 0)
      temp$lead3_same_gender <- ifelse(temp[,paste0('lead3.cand_', i, '.', k)]==1, 1, 0)
      rownames(temp) <- NULL
      if(k=='d'){
        outcomes_long <- rbind(outcomes_long, 
                               temp %>% 
                                 select(cycle, district_final, party, race_gender_share, race_gender_logtotal, race_gender_total, race_gender_total_all, id,
                                               contains('same_'), starts_with('d_'), starts_with('pct_'), inc.d, Open.Seat.d:military.d, cand_B.d:cand_1.d) %>%
                                 rename_at(vars(inc.d:cand_1.d), function(x){str_remove_all(x, '\\.d$')}))
        }
      if(k=='r'){outcomes_long <- rbind(outcomes_long, 
                                        temp %>% select(cycle, district_final, party, race_gender_share, race_gender_logtotal, race_gender_total, race_gender_total_all, id,  
                                                        contains('same_'), starts_with('d_'), starts_with('pct_'), inc.r, Open.Seat.r:military.r, cand_B.r:cand_1.r) %>%
                                          rename_at(vars(inc.r:cand_1.r), function(x){str_remove_all(x, '\\.r$')}))
      }
    }
  }
}
write.dta(outcomes_long, "./Data/did_gender_long.dta")

outcomes_long %>%
  group_by(same_race, same_gender) %>%
  summarise(mean(race_gender_total, na.rm=T))

outcomes_long %>%
  filter(id!="W0") %>%
  group_by(same_race, same_gender) %>%
  summarise(mean(race_gender_total, na.rm=T))

outcomes_long %>%
  group_by(same_race, same_gender, id) %>%
  summarise(mean(race_gender_total, na.rm=T))

outcomes_long %>%
  group_by(same_race, id) %>%
  summarise(mean(race_gender_total, na.rm=T))

outcomes_long %>%
  group_by(same_gender, id) %>%
  summarise(mean(race_gender_total, na.rm=T))

plotdata <- outcomes_long %>%
  arrange(district_final, id, cycle) %>%
  mutate(lag_same_gender = lag(cand_1, default=0),
         lag_same_race = lag(1-cand_W, default=0),
         lead_same_gender = lead(cand_1, default=0),
         lead_same_race = lead(1-cand_W, default=0),
         time_gender = case_when(cand_1==1 ~ 0,
                                 lag_same_gender==1 ~ 1,
                                 lead_same_gender==1~ -1),
         time_race = case_when(cand_W==0 ~ 0,
                                 lag_same_race==1 ~ 1,
                                 lead_same_race==1 ~ -1),
         time_race_gender = case_when(cand_1==1 & cand_W==0 ~ 0,
                                 lag_same_race==1 & lag_same_gender==1 ~ 1,
                                 lead_same_race==1 & lead_same_gender==1 ~ -1))

ggplot(data=plotdata %>%
         group_by(time_gender, same_gender) %>%
         summarise(mean=mean(race_gender_total, na.rm=T), sd = sd(race_gender_total, na.rm=T)) %>%
         filter(!is.na(same_gender) & !is.na(time_gender))) + 
  geom_line(aes(x=time_gender, y=mean, lty=as.factor(same_gender))) + 
  #geom_pointrange(aes(x=time_gender, y=mean, ymin = mean-sd, ymax=mean+sd)) + 
  theme_classic() + 
  labs(x='Election Cycles Around Female Candidate', y = 'Average Contribution Total') + 
  theme(legend.position='bottom', text=element_text(size=20)) +
  scale_x_continuous(breaks = c(-1,0,1)) + 
  scale_linetype_discrete(labels = c('Female Contributions', 'Male Contributions'), name='')
ggsave('./Output/parallel trends gender.pdf', width=11, height=8)

ggplot(data=plotdata %>%
         group_by(time_race, same_race) %>%
         summarise(mean=mean(race_gender_total, na.rm=T), sd = sd(race_gender_total, na.rm=T)) %>%
         filter(!is.na(same_race) & !is.na(time_race))) + 
  geom_line(aes(x=time_race, y=mean, lty=as.factor(same_race))) + 
  #geom_pointrange(aes(x=time_gender, y=mean, ymin = mean-sd, ymax=mean+sd)) + 
  theme_classic() + 
  labs(x='Election Cycles Around Candidate of Color', y = 'Average Contribution Total') + 
  theme(legend.position='bottom', text=element_text(size=20)) +
  scale_x_continuous(breaks = c(-1,0,1)) +
  scale_linetype_discrete(labels = c('Contributions of Color', 'White Contributions'), name='')
ggsave('./Output/parallel trends race.pdf', width=11, height=8)

ggplot(data=outcomes_long %>%
         group_by(time_race) %>%
         summarise(mean=mean(race_gender_total, na.rm=T)), aes(x=time_race, y = mean)) + 
  geom_line() + 
  theme_classic()

ggplot(data=outcomes_long %>%
         group_by(time_race_gender) %>%
         summarise(mean=mean(race_gender_total, na.rm=T)), aes(x=time_race_gender, y = mean)) + 
  geom_line() + 
  theme_classic()

plotdata <- rbind(outcomes_all %>% 
                    mutate(pct_W = ifelse(pct_W<0, 0, pct_W), name = "General Election",
                           logtotal = log(general.total.all+1)) %>% 
                    select(pct_W, logtotal, name),
                  outcomes_all %>% 
                    mutate(pct_W = ifelse(pct_W<0, 0, pct_W), name = "Primary Election",
                           logtotal = log(primary.total.all+1)) %>% 
                    select(pct_W, logtotal, name))

# descriptive plots
ggplot(plotdata, aes(x=pct_W, y=logtotal)) +
  geom_point(alpha=.1) +
  geom_smooth() + 
  facet_wrap(.~name) +
  theme_classic() + 
  ylim(5, 16) + 
  labs(x="District Percent White", y = "Log Total Contributions")
ggsave("./Output/district_race_contribtotal.pdf", w=5,h=5)

plotdata <- rbind(outcomes_all %>% mutate(d_pct = ifelse(d_pct_blucllr<0, 0, d_pct_blucllr), name = "Blue Collar",
                                          logtotal = log(primary.total.all + general.total.all+1)) %>% 
                    select(d_pct, name, logtotal),
                  outcomes_all %>% mutate(d_pct = ifelse(d_pct_union<0, 0, d_pct_union), name = "Union Member",
                                          logtotal = log(primary.total.all + general.total.all+1)) %>% 
                    select(d_pct, name, logtotal))
  

# descriptive plots
ggplot(plotdata, aes(x=d_pct, y=logtotal)) +
  geom_point(alpha=.1) +
  geom_smooth() + 
  theme_classic() + 
  facet_wrap(.~name, scales='free_x') +
  ylim(5, 16) + 
  labs(x="Percent of District", y = "Log Total Contributions")
ggsave("./Output/district_class_contribtotal.pdf", w=5,h=5)

plotdata <- rbind(outcomes_all %>% 
                    mutate(contrib_share = (general.total.A.all + primary.total.A.all)/(general.total.all + primary.total.all), name = "Asian") %>%
                    select(d_share = pct_A, contrib_share, name),
                  outcomes_all %>% 
                    mutate(contrib_share = (general.total.B.all + primary.total.B.all)/(general.total.all + primary.total.all), name = "Black") %>%
                    select(d_share = pct_B, contrib_share, name),
                  outcomes_all %>% 
                    mutate(contrib_share = (general.total.H.all + primary.total.H.all)/(general.total.all + primary.total.all), name = "Latino") %>%
                    select(d_share = pct_H, contrib_share, name),
                  outcomes_all %>% 
                    mutate(contrib_share = (general.total.W.all + primary.total.W.all)/(general.total.all + primary.total.all), name = "White") %>%
                    select(d_share = pct_W, contrib_share, name))

ggplot(plotdata, aes(x=d_share, y=contrib_share)) +
  geom_point(alpha=.1) +
  geom_smooth() + 
  facet_wrap(.~name) +
  theme_classic() + 
  xlim(0,100) + 
  ylim(0,1) + 
  labs(x="District Percent Ethnorace", y = "Share of Contributions from Ethnoracial Group")
ggsave("./Output/district_race_contribshare.pdf", w=5,h=5)
ggsave("./Output/share_by_race_nonwhite_new.pdf", w=5,h=5)

## FIGURE 2 ##
plotdata <- outcomes_all %>%
  filter(cycle<=2010) %>%
  group_by(cycle) %>%
  summarise(contrib = sum(primary.total.all, na.rm=T) + sum(general.total.all, na.rm=T),
            contrib_A = sum(primary.total.A.1.all, na.rm=T) + sum(general.total.A.1.all, na.rm=T),
            contrib_B = sum(primary.total.B.1.all, na.rm=T) + sum(general.total.B.1.all, na.rm=T),
            contrib_L = sum(primary.total.H.1.all, na.rm=T) + sum(general.total.H.1.all, na.rm=T),
            contrib_W = sum(primary.total.W.1.all, na.rm=T) + sum(general.total.W.1.all, na.rm=T)) %>%
  mutate(name = "Female Contributors") %>%
  bind_rows(outcomes_all %>%
              filter(cycle<=2010) %>%
              group_by(cycle) %>%
              summarise(contrib = sum(primary.total.all, na.rm=T) + sum(general.total.all, na.rm=T),
                        contrib_A = sum(primary.total.A.0, na.rm=T) + sum(general.total.A.0.all, na.rm=T),
                        contrib_B = sum(primary.total.B.0.all, na.rm=T) + sum(general.total.B.0.all, na.rm=T),
                        contrib_L = sum(primary.total.H.0.all, na.rm=T) + sum(general.total.H.0.all, na.rm=T),
                        contrib_W = sum(primary.total.W.0.all, na.rm=T) + sum(general.total.W.0.all, na.rm=T)) %>%
              mutate(name = "Male Contributors")) %>%
  mutate(share_A = contrib_A/contrib,
         share_B = contrib_B/contrib,
         share_L = contrib_L/contrib,
         share_W = contrib_W/contrib) %>%
  select(cycle, name, share_A:share_W) %>%
  gather(group, share, share_A:share_W)

ggplot(plotdata %>% filter(group!="share_W")) + 
  geom_col(aes(x=cycle, y=share, fill=group)) + 
  theme_classic() + 
  labs(x='Year', y='Prop. of Contributions') + 
  scale_fill_grey(labels=c("Asian","Black","Latino")) +
  facet_grid(.~ name) + 
  theme(legend.title = element_blank(), legend.position="bottom")
ggsave("./Output/share_by_race_gender_nonwhite_new.pdf", w=5,h=5)

ggplot(plotdata %>% filter(group=="share_W" & name=="Female Contributors")) + 
  geom_col(aes(x=cycle, y=share, fill=group)) + 
  theme_classic() + 
  labs(x='Year', y='Prop. of Contributions from White Women') + 
  scale_fill_grey(labels=c("Asian","Black","Latino")) +
  #facet_grid(.~ name) + 
  theme(legend.title = element_blank(), legend.position="none")
ggsave("./Output/share_white_women.pdf", w=5,h=5)

ggplot(plotdata %>% filter(name=="Female Contributors")) + 
  geom_col(aes(x=cycle, y=share, fill=group)) + 
  theme_classic() + 
  labs(x='Year', y='Prop. of Contributions from Women') + 
  scale_fill_grey(labels=c("Asian","Black","Latino", "White")) +
  #facet_grid(.~ name) + 
  theme(legend.title = element_blank(), legend.position="bottom")
ggsave("./Output/share_by_race_women.pdf", w=5,h=5)

plotdata <- outcomes_all %>%
  filter(cycle<=2010) %>%
  group_by(cycle) %>%
  summarise(contrib = sum(primary.total.1.all, na.rm=T) + sum(general.total.1.all, na.rm=T),
            contrib_A = sum(primary.total.A.1.all, na.rm=T) + sum(general.total.A.1.all, na.rm=T),
            contrib_B = sum(primary.total.B.1.all, na.rm=T) + sum(general.total.B.1.all, na.rm=T),
            contrib_L = sum(primary.total.H.1.all, na.rm=T) + sum(general.total.H.1.all, na.rm=T),
            contrib_W = sum(primary.total.W.1.all, na.rm=T) + sum(general.total.W.1.all, na.rm=T)) %>%
  mutate(name = "Female Contributors") %>%
  bind_rows(outcomes_all %>%
              filter(cycle<=2010) %>%
              group_by(cycle) %>%
              summarise(contrib = sum(primary.total.0.all, na.rm=T) + sum(general.total.0.all, na.rm=T),
                        contrib_A = sum(primary.total.A.0, na.rm=T) + sum(general.total.A.0.all, na.rm=T),
                        contrib_B = sum(primary.total.B.0.all, na.rm=T) + sum(general.total.B.0.all, na.rm=T),
                        contrib_L = sum(primary.total.H.0.all, na.rm=T) + sum(general.total.H.0.all, na.rm=T),
                        contrib_W = sum(primary.total.W.0.all, na.rm=T) + sum(general.total.W.0.all, na.rm=T)) %>%
              mutate(name = "Male Contributors")) %>%
  mutate(share_A = contrib_A/contrib,
         share_B = contrib_B/contrib,
         share_L = contrib_L/contrib,
         share_W = contrib_W/contrib) %>%
  select(cycle, name, share_A:share_W) %>%
  gather(group, share, share_A:share_W)

ggplot(plotdata %>% filter(group!="share_W")) + 
  geom_col(aes(x=cycle, y=share, fill=group)) + 
  theme_classic() + 
  labs(x='Year', y='Prop. of Contributions by Gender') + 
  scale_fill_grey(labels=c("Asian","Black","Latino")) +
  facet_grid(.~ name) + 
  theme(legend.title = element_blank(), legend.position="bottom")
ggsave("./Output/share_gender_by_race_gender_nonwhite.pdf", w=5,h=5)